Tidy Tuesday week 1 is a data set on the average cost of tuition in the US over time. The data is in wide untidy form and must be tidied so it can then be plotted. To avoid problems with importing the data as an .xlsx file and R changing the cost to a character variable I changed the format in Excel and saved it as a .csv file.
tuition_data <- read.csv('us_avg_tuition.csv', stringsAsFactors = FALSE, header=TRUE, sep=",", na.strings="M") %>% mutate(percent=(X2015.16-X2010.11)/X2015.16*100)
head(tuition_data)
## State X2004.05 X2005.06 X2006.07 X2007.08 X2008.09 X2009.10
## 1 Alabama 5683 5841 5753 6008 6475 7189
## 2 Alaska 4328 4633 4919 5070 5075 5455
## 3 Arizona 5138 5416 5481 5682 6058 7263
## 4 Arkansas 5772 6082 6232 6415 6417 6627
## 5 California 5286 5528 5335 5672 5898 7259
## 6 Colorado 4704 5407 5596 6227 6284 6948
## X2010.11 X2011.12 X2012.13 X2013.14 X2014.15 X2015.16 percent
## 1 8071 8452 9098 9359 9496 9751 17.22900
## 2 5759 5762 6026 6012 6149 6571 12.35733
## 3 8840 9967 10134 10296 10414 10646 16.96412
## 4 6901 7029 7287 7408 7606 7867 12.27914
## 5 8194 9436 9361 9274 9187 9270 11.60734
## 6 7748 8316 8793 9293 9299 9748 20.51703
The gather function allows you to make the data set go from wide to long, i.e. instead of having a column for each financial year there will be a financial year column and a column containing the cost.
tuition_tidy <- tuition_data %>% gather(X2004.05:X2015.16, key="year", value = "cost") %>% mutate(year=str_replace(year, "X2004.05", "2004-05")) %>% mutate(year=str_replace(year, "X2004.05", "2004-05")) %>% mutate(year=str_replace(year, "X2005.06", "2005-06")) %>% mutate(year=str_replace(year, "X2006.07", "2006-07")) %>% mutate(year=str_replace(year, "X2007.08", "2007-08")) %>% mutate(year=str_replace(year, "X2008.09", "2008-09")) %>% mutate(year=str_replace(year, "X2009.10", "2009-10")) %>% mutate(year=str_replace(year, "X2010.11", "2010-11")) %>% mutate(year=str_replace(year, "X2011.12", "2011-12")) %>% mutate(year=str_replace(year, "X2012.13", "2012-13")) %>% mutate(year=str_replace(year, "X2013.14", "2013-14")) %>% mutate(year=str_replace(year, "X2014.15", "2014-15")) %>% mutate(year=str_replace(year, "X2015.16", "2015-16"))
head(tuition_tidy)
## State percent year cost
## 1 Alabama 17.22900 2004-05 5683
## 2 Alaska 12.35733 2004-05 4328
## 3 Arizona 16.96412 2004-05 5138
## 4 Arkansas 12.27914 2004-05 5772
## 5 California 11.60734 2004-05 5286
## 6 Colorado 20.51703 2004-05 4704
The data is now tidy and ready to graph. Here is a bar chart for the most recent year by state.
tuition_1516 <- filter(tuition_tidy, year == "2015-16")
arrange(tuition_1516,desc(cost))
## State percent year cost
## 1 New Hampshire 13.7401055 2015-16 15160
## 2 Vermont 8.9241646 2015-16 14993
## 3 Pennsylvania 7.3982829 2015-16 13395
## 4 New Jersey 4.1419229 2015-16 13303
## 5 Illinois 8.9544317 2015-16 13189
## 6 Michigan 7.4722709 2015-16 11991
## 7 Virginia 18.4110331 2015-16 11819
## 8 South Carolina 6.7704807 2015-16 11816
## 9 Delaware 9.7807468 2015-16 11676
## 10 Massachusetts 10.3727994 2015-16 11588
## 11 Connecticut 13.7755550 2015-16 11397
## 12 Rhode Island 10.4389816 2015-16 11390
## 13 Minnesota 5.1241806 2015-16 10831
## 14 Arizona 16.9641180 2015-16 10646
## 15 Washington 12.7818818 2015-16 10288
## 16 Ohio 2.7755983 2015-16 10196
## 17 Hawaii 19.7641278 2015-16 10175
## 18 Alabama 17.2290022 2015-16 9751
## 19 Colorado 20.5170291 2015-16 9748
## 20 Maine -2.2876841 2015-16 9573
## 21 Kentucky 13.9646702 2015-16 9567
## 22 Oregon 13.1576139 2015-16 9371
## 23 California 11.6073355 2015-16 9270
## 24 Tennessee 22.9191407 2015-16 9263
## 25 Maryland 7.5630252 2015-16 9163
## 26 Indiana 4.1228070 2015-16 9120
## 27 Texas 7.0198530 2015-16 9117
## 28 Wisconsin 4.2427680 2015-16 8815
## 29 Missouri 7.0644559 2015-16 8564
## 30 Kansas 14.9589683 2015-16 8530
## 31 Georgia 23.9019770 2015-16 8447
## 32 South Dakota 14.1402855 2015-16 8055
## 33 Iowa 0.5712835 2015-16 7877
## 34 Louisiana 34.1760894 2015-16 7871
## 35 Arkansas 12.2791407 2015-16 7867
## 36 North Dakota 6.4646202 2015-16 7688
## 37 New York 17.0329670 2015-16 7644
## 38 Nebraska 5.0341746 2015-16 7608
## 39 Oklahoma 15.5167785 2015-16 7450
## 40 West Virginia 20.2203319 2015-16 7171
## 41 Mississippi 18.9170281 2015-16 7147
## 42 North Carolina 16.4348200 2015-16 6973
## 43 Idaho 14.4177178 2015-16 6818
## 44 Nevada 12.6743663 2015-16 6667
## 45 Alaska 12.3573277 2015-16 6571
## 46 Utah 15.4329719 2015-16 6363
## 47 Florida 13.3490566 2015-16 6360
## 48 New Mexico 11.6601101 2015-16 6355
## 49 Montana 3.2750748 2015-16 6351
## 50 Wyoming 12.1038642 2015-16 4891
bar <- ggplot(tuition_1516, aes(x = reorder(State, cost), y = cost, fill = cost)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_viridis(direction=-1) + theme_minimal() + labs(x="State", y="Cost ($US)",
title="Average tuition cost, USA, 2015-16",
caption="Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
bar
New Hampshire had the highest and Wyoming had the lowest average cost in 2015-16.
Lets have a look at how the cost of tuition has changed over time in each state.
ggplot(tuition_tidy, aes(x = factor(year), y = cost, group = State)) + geom_line(color = "thistle3", size = 1.25) + theme_minimal() + labs(x="Year", y="Cost ($US)",
title="Average tuition cost, USA, 2015-16",
caption="Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
p1 <- ggplot(tuition_tidy, aes(x = factor(year), y = cost, group = State)) + geom_line(color = "thistle3", size = 1.25) + theme_minimal() + labs(x="Year", y="Cost ($US)",
title="Average tuition cost, USA, 2015-16",
caption="Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
ggplotly(p1) %>% layout(autosize = F, width = 800, height = 600)
It looks like there is an overall trend towards increasing cost over time, except for maybe a couple of states.
ggplot(tuition_tidy, aes(year, cost, group = State)) +
geom_line(color = "steelblue") +
facet_geo(~ State, grid = "us_state_grid2", label = "name") + theme ( axis.text.x = element_blank()) +
labs(title = "Average tuition cost, USA, 2004-05 to 2015-16",
caption = "Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/",
x = "Year",
y = "Cost ($US)") +
theme(strip.text.x = element_text(size = 6))
Average tuition costs in the North East are increasing over time. States in the Mid West are not increasing as much as other states.
s <- filter(tuition_tidy, State == "New Hampshire" | State == "Hawaii" | State == "Vermont" | State == "Wyoming")
ggplot(s, aes(x = cost, y = State)) + geom_density_ridges(scale = 1, color="#FFFFFF", fill="#56B4E9") + theme_minimal() + labs(subtitle = "Average tuition cost distribution, Selected States, USA, 2004-05 to 2015-16",
caption = "Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
states <- map_data("state") #Get US state geo information
data <- tuition_1516 %>% mutate(region = tolower(State))
# Join the values to the map using region
us_map <- left_join(states, data)
#generate the map
m2 <- ggplot() + geom_map(aes(x = long, y = lat, map_id = region), data = us_map,
map = us_map, fill = "#ffffff", color = "#ffffff", size = 0.15) + geom_map(aes(fill = percent, map_id = region),
map = us_map, data = us_map,
color = "#ffffff", size = 0.15) +
viridis::scale_fill_viridis(name="Percent change", direction=-1) +
coord_map('albers', lat0 = 39, lat1 = 45) +
ggthemes::theme_map() +
theme(legend.position="bottom") +
labs(title = "Average tuition cost percent change, USA, 2010-11 to 2015-16",
caption = "Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
m2
m3 <- ggplot() + geom_map(aes(x = long, y = lat, map_id = region), data = us_map,
map = us_map, fill = "#ffffff", color = "#ffffff", size = 0.15) + geom_map(aes(fill = cost, map_id = region),
map = us_map, data = us_map,
color = "#ffffff", size = 0.15) +
viridis::scale_fill_viridis(name="Cost", direction=-1) +
coord_map('albers', lat0 = 39, lat1 = 45) +
ggthemes::theme_map() +
theme(legend.position="bottom") +
labs(title = "Average tuition cost, USA, 2015-16",
caption = "Source: https://onlinembapage.com/average-tuition-and-educational-attainment-in-the-united-states/")
m3